# load the data for the main text.  This data is the same as that used in the Appendix except those
# individuals who failed the attentiveness checks are removed.
df <- read.csv("./teq-strike_app_clean.csv")
library(dplyr)
df <- df %>% filter(att1==10) %>% filter(att2==4 | att2==5)

# create the ggplot with the raw TEQ scores and strike data
library(ggplot2)
ggplot(data = df, aes(x = teq, y = strike)) + geom_jitter(height = 0.05,col="black") +
  labs(y= "", x = "") +
  theme_bw() + coord_cartesian(ylim = c(-0.2,1.2), xlim = c(20,64)) + scale_x_continuous(breaks=c(20,30,40,50,60,70)) + scale_y_continuous(breaks=c(0,1),labels = c("Not Strike","Strike"))

# create the 4 logit models
model1 <- glm(data = df, strike~teq, family = binomial("logit"))
model2 <- glm(data = df, strike~teq + white + age2 + dem + rep + education2 + female, family = binomial("logit"))
model3 <- glm(data = df, strike~teq + white + age2 + dem + rep + education2 + female + I(teq*female), family = binomial("logit"))
model4 <- glm(data = df, strike~teq + white + age2 + dem + rep + education2 + female + I(teq*female) + believe_casualties + hurt_relations + counterattack + prevent_prolif_longterm, family = binomial("logit"))

# kick out the stargazer latex code
library(stargazer)
stargazer(model1,model2,model3,model4,omit = c("Constant"),font.size = "footnotesize",style = "apsr",report=("vc*p"))

# Latex code further edited by hand for aesthetic purposes.  All values remain the same as from the stargazer output above.

# Appendix B (Power) Code

# Function to calculate the required sample size to achieve a specific power for logit models
# It is based on Phil Ender's powerlog.ado (version 1.0) for Stata, which is itself based on POWERLOG.SAS (version 1.1)
# for SAS by Michael Friendly.

powerlog <- function(p1, p2, r2, alpha, power, help= TRUE) {
  pd <- p2 - p1
  l1 <- p1 / (1-p1)
  l2 <- p2 / (1-p2)
  theta <- l2 / l1
  or <- theta
  lambda <- log(theta)
  lambda2 <- lambda^2
  za <- qnorm(1-alpha)
  
  cat("\nLogistic regression power analysis\n")
  cat("One-tailed test: alpha =", alpha, ", power =", power, ", p1 =", p1, ", p2 =", p2, ", r2 =", r2, ", odds ratio =", or, "\n\n")
  cat("n\n")
  delta <- (1 + (1 + lambda2) * exp(5 * lambda2 / 4)) / (1 + exp(-1 * lambda2 / 4))
  zb <- qnorm(power)
  N <- ((za + zb * exp(-1 * lambda2 / 4))^2 * (1 + 2 * p1 * delta)) / (p1 * lambda2)
  N <- round(N / (1-r2))
  cat(formatC(N, format="d", big.mark=","), "\n")
  
  if (help != FALSE) {
    cat("\nExplanation of terms:\n")
    cat("p1 is the probability that the outcome equals 1 when the predictor x is at the mean\n")
    cat("p2 is the probability that the outcome equals 1 when the predictor x is one standard deviation above its mean\n")
    if (r2 != 0) {
      cat("r2 is the squared mulitple correlation between the predictor variable and all other variables in the model\n")
    }
    cat("n is the sample size required for the logit model\n")
  }
  
  return(list(p1 = p1, p2 = p2, r2 = r2, alpha = alpha, power = power, n = N))
  
}

# Calculate the iv values to produce p1 (fitted value at the teq mean) and p2 (fitted value at the teq mean + one sd).
#We round because fitted values are for full values of teq since it's not a continuous variable (no fractions).

teq_mean <- round(mean(df$teq),0)
teq_mean_plus_sd <- round(mean(df$teq) + sd(data$teq),0)

# Find the fitted values for teq_mean = 40 and teq_mean_plus_sd = 50.

fitted(model)

# The fitted values are p1 = 0.88 and p2 = 0.83

# We vary alpha at 2 levels: 0.1 and 0.05
# The power is varied to give different n values for the tables.  The specific code to reproduce the Latex code is below.
# All code is for the naive model.
# r2=0 because all code is for the naive model, so there is no correlation between explanatory variables.

#Vector of powers
vector <- c(0.75, 0.76, 0.77, 0.78, 0.79, 0.80, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.90)

#Column 1 of the Appendix B table
ss90 <- powerlog(p1 = 0.88, p2 = 0.83, r2 = 0, alpha = .1, power = vector, help = TRUE)
ss90

#Column 2 of the Appendix B table
ss95 <- powerlog(p1 = 0.88, p2 = 0.83, r2 = 0, alpha = .05, power = vector, help = TRUE)
ss95

# Table A.1 is now created in LateX using the numbers from the above calculations.

# LateX syntax below
# \begin{table}[!htbp] \centering 
# \caption{Logistic Regression Power Analysis} 
# \label{} 
# \footnotesize 
# \begin{tabular}{@{\extracolsep{5pt}}lcccc} 
# \\[-1.8ex]\hline \\[-1.8ex] 
# \\[-1.8ex] & \multicolumn{2}{c}{$n$ Required} \\ 
# \\[-1.8ex] Power & $\alpha=0.1$ & $\alpha=0.05$ \\ 
# \hline \\[-1.8ex] 
# 0.75 & 81 & 115 \\ 
# 0.76 & 84 & 118 \\ 
# 0.77 & 87 & 121 \\ 
# 0.78 & 89 & 125 \\ 
# 0.79 & 92 & 128 \\ 
# 0.80 & 95 & 132 \\ 
# 0.81 & 99 & 135 \\ 
# 0.82 & 102 & 139 \\ 
# 0.83 & 106 & 143 \\ 
# 0.84 & 109 & 148 \\ 
# 0.85 & 113 & 152 \\ 
# 0.86 & 118 & 157 \\ 
# 0.87 & 122 & 163 \\ 
# 0.88 & 127 & 168 \\ 
# 0.89 & 132 & 174 \\ 
# 0.90 & 138 & 181 \\ 
# \hline \\[-1.8ex] 
# \end{tabular} 
# \label{tab:tab1}
# \end{table} 

# Appendix C code
# load the data without the filtering for attentiveness
df <- read.csv("./teq-strike_app_clean.csv")

# create the 4 logit models
model1 <- glm(data = df, strike~teq, family = binomial("logit"))
model2 <- glm(data = df, strike~teq + white + age2 + dem + rep + education2 + female, family = binomial("logit"))
model3 <- glm(data = df, strike~teq + white + age2 + dem + rep + education2 + female + I(teq*female), family = binomial("logit"))
model4 <- glm(data = df, strike~teq + white + age2 + dem + rep + education2 + female + I(teq*female) + believe_casualties + hurt_relations + counterattack + prevent_prolif_longterm, family = binomial("logit"))

# kick out the stargazer latex code for Table A.2.
stargazer(model1,model2,model3,model4,omit = c("Constant"),font.size = "footnotesize",style = "apsr",report=("vc*p"))

# Table A.2 is now created in LateX using the numbers from the above stargazer function.
